import requests
from lxml import etree
import pymysql
def get_html(url):
    try:
        r = requests.get(url)
        r.encoding = r.apparent_encoding
        r.raise_for_status()
        return r.text
    except Exception as e:
        print(e)

def parser(html):
    doc = etree.HTML(html)
    out_list = []
    for row in doc.xpath('//*[@id="content"]/div//ul/li'):#[li,li,...]
        bookname = row.xpath('div/h2/a/text()')[0].strip()#[<>三只</>]
        score = row.xpath('div/p[1]/span[2]/text()')[0].strip()
        info = row.xpath('div/p[2]/text()')[0].strip().split('/')#[名字，出版社，出版日期]
        describe = row.xpath('div/p[3]/text()')[0].strip()
        row_data = [bookname,score,info[0],info[1],info[2],describe]
        out_list.append(row_data)
    return out_list

# 数据库（sql,val,**dbdata）
def save_mysql(sql,val,**dbdata):
    try:
        #1.创建链接
        connect = pymysql.connect(**dbdata)
        #2.游标对象
        cursor = connect.cursor()
        #3.执行多条sql语句
        cursor.executemany(sql,val)
        #4.提交事务
        connect.commit()
    except Exception as e:
        connect.rollback()
        print(e)
    finally:
        cursor.close()
        connect.close()


if __name__ == '__main__':
    url = 'http://www.bspider.top/doubanbook/'
    html = get_html(url)
    out_list = parser(html)
    sql = 'insert into bookinfo(bookname,score,autor,press,pubdate,describ)'\
    'values(%s,%s,%s,%s,%s,%s)'
    data = {'host':'127.0.0.1',
            'user':'root',
            'password':'admin',
            'db':'spider',
            'charset':'utf8'}
    save_mysql(sql,out_list,**data)
